library cml;
#include cml.ext;
CMLset;

__title=" ** A Beta Model, Numerical Solution **";
dta="/home/gov/faculty/ppaolino/parr/selden";
let dep=minelig;
let ind=efficncy respmin hardship minority;
let indV=0;
let sel=0;

c={1,0,0,0,0};

if ind==0;
   vars=dep;
   elseif indV==0; 
   vars=dep|ind;    
   else;
   vars=dep|ind|indV;
endif;

dataset=listw(dta,vars,sel);

stval=  -4.8037|
	 2.7896|
	 0.1345|
	 2.6309|
	 0.1754|
	 1.2412;
	 
proc psi(x);
  local p;
  x=x+6;
  p=1/(x.*x);
  p=(((0.004166666666667*p-0.003968253986254).*p+
      0.008333333333333).*p-0.083333333333333).*p;
  p=p+ln(x)-0.5/x-1/(x-1)-1/(x-2)-1/(x-3)-1/(x-4)-1/(x-5)-1/(x-6);
retp(p);
endp;

proc gradproc(theta,ds);
    local y,one,x,z,be,h,xb,zh,zh1,e,per1,gr1,gr2,grad;
    y=(ds[.,1]+.01)*100/10002;
    one=ones(rows(ds),1);
if ind==0;
    x=one;
    else;
    x=one~ds[.,2:rows(ind)+1];
endif;
if indV==0;
    z=one;	     
    else;
    z=one~ds[.,cols(ds)-rows(indV)+1:cols(ds)];
endif;		     
    be=theta[1:cols(x),1];
    h=trimr(theta,rows(be),0);      
    xb=exp(x*be);
    zh=exp(z*h);
    zh1=zh+1;
    e=xb./(1+xb);
    per1=(-x.*e+x.*(e^2)).*zh1;

    gr1=psi(e.*zh1+(1-e).*zh1-1).*(x.*e.*zh1-(e^2).*zh1.*x+per1)-
        psi(e.*zh1-e).*(x.*e.*zh1-(e^2).*zh1.*x-x.*e+(e^2).*x)-
        psi((1-e).*zh1-1+e).*(per1+x.*e-(e^2).*x)+
        (x.*e.*zh1-(e^2).*zh1.*x-x.*e+(e^2).*x).*ln(y)+
        (per1+x.*e-(e^2).*x).*ln(1-y);
    gr2=psi(e.*zh1+(1-e).*zh1-1).*(e.*z.*zh+(1-e).*z.*zh)-
        (psi(e.*zh1-e).*xb.*z.*zh)./(1+xb)-
        psi((1-e).*zh1-1+e).*(1-e).*z.*zh+xb.*z.*zh.*ln(y)./(1+xb)+
        (1-e).*z.*zh.*ln(1-y);
    grad=gr1~gr2;
    retp(grad);endp;

proc loglik(theta,ds);
    local y,one,x,z,be,h,xb,zh,e,v,a,b,llik,ds1;
		     
    y=(ds[.,1]+.01)*100/10002;
    one=ones(rows(ds),1);
if ind==0;
    x=one;
    else;
    x=one~ds[.,2:rows(ind)+1];
endif;
if indV==0;
    z=one;	     
    else;
    z=one~ds[.,cols(ds)-rows(indV)+1:cols(ds)];
endif;		     
    be=theta[1:cols(x),1];
    h=trimr(theta,rows(be),0);      
    xb=exp(x*be);
    zh=exp(z*h);
    e=xb./(1+xb);
    v=e.*(1-e).*(1/(zh+1));
    a=((e^2).*(1-e))./v-e;
    b=(e.*(1-e)^2)./v-(1-e);
    llik=lng(a+b)-lng(a)-lng(b)+(a-1).*ln(y)+(b-1).*ln(1-y);
    retp(llik);endp;

    if ind==0;
      _cml_ParNames="const"|"const";
    elseif indV==0;
      _cml_ParNames="const"|ind|"const";
    else;
      _cml_ParNames="const"|ind|"const"|indV;
    endif;

/* _cml_GradCheck=1;
_cml_GradProc=&gradproc;  */
_cml_Options = { none }; 
_cml_DirTol=0.00001; 

t=dta $+ dep $+ ".dir.out"; 
output file=^t reset; 
    print "dependent variable=" ;; $dep; 
    {b,logl,g,vc,ret}=CMLprt(CML(dataset,0,&loglik,stval));
@ cond(_cml_FinalHess); @

/* set ind and indV matrices */
y=(dataset[.,1]+.01)*100/10002;
one=ones(rows(dataset),1);

if ind==0;
  x=one;
  else;
  x=ones(rows(dataset),1)~dataset[.,2:rows(ind)+1]; 
endif;

if indV==0;
  z=one;
  else;
  z=ones(rows(dataset),1)~dataset[.,cols(dataset)-rows(indV)+1:cols(dataset)]; 
endif;		     

/* get predicted values of y */
    bb=trimr(b,0,1);
    if indV/=0;
       bb=trimr(b,0,rows(indV)+1);
    endif;
    xb=exp(x*bb);
    eyb=xb./(1+xb);

/* set descriptives for x and z */
meanx=meanc(x);  @ these are k1X1 vectors @
stdx=stdc(x);	
minx=minc(x);	
maxx=maxc(x);	

meanz=meanc(z);

/* create switching parameter */

if ind/=0;
sw=zeros(1,cols(x)-1)|eye(cols(x)-1);
xl=meanx-stdx.*sw;           @ these are kXk-1 vectors @
xh=meanx+stdx.*sw;       
xmin=meanx.*(1-sw)+minx.*sw;  
xmax=meanx.*(1-sw)+maxx.*sw;
endif;

/* generate the parameter vectors */
    be=b[1:cols(x),.];      @ chop off parms for alpha (k1X1) @
    h=trimr(b,rows(be),0);  @ chop off parms for beta (k2X1) @

/* get predicted means */

    em=(exp(meanx'*be)./(1+exp(meanx'*be)))'; @ these are nX1 vectors @

    if ind/=0;
      emin=(exp(xmin'*be)./(1+exp(xmin'*be)))';
      emax=(exp(xmax'*be)./(1+exp(xmax'*be)))';
      el=(exp(xl'*be)./(1+exp(xl'*be)))';
      eh=(exp(xh'*be)./(1+exp(xh'*be)))';
    endif;
  
/* get first differences for the mean */
    if ind/=0;
    fdm2=eh-el;
    fdmm=emax-emin;
    endif;
    
/* get predicted variances for variables affecting the mean */

  betasim=rndmn(b,vc,1000);
  bs=betasim[.,1:cols(x)];
  hs=betasim[.,cols(x)+1:cols(betasim)];

  ml=(exp(bs*xl)./(1+exp(bs*xl)));
  mh=(exp(bs*xh)./(1+exp(bs*xh)));
  m0=(exp(bs*xmin)./(1+exp(bs*xmin)));
  m1=(exp(bs*xmax)./(1+exp(bs*xmax)));
  
@  if indv/=0; @
    j=1; i=1; hx=zeros(1000,rows(c)); 
    do until j>rows(c);
       if c[j]==1;
	 hx[.,j]=hx[.,j]+hs[.,i];
	 i=i+1;
       endif;
    j=j+1;
    endo;
@  endif; @

    dm=(1/(exp(meanz'*h)+1));
    vm=em.*(1-em).*dm;

@  if indV/=0; @
      dmin=(1/(exp(hx*xmin)+1));
      dmax=(1/(exp(hx*xmax)+1));
      dl=(1/(exp(hx*xl)+1));
      dh=(1/(exp(hx*xh)+1));
      vmin=m0.*(1-m0).*dmin;
      vmax=m1.*(1-m1).*dmax;
      vl=ml.*(1-ml).*dl;
      vh=mh.*(1-mh).*dh;

      fdv2=vh-vl;
      fdvm=vmax-vmin;
@  endif; @

  if ind/=0;
    print $ind;; meanc(fdm2)~meanc(fdmm);
  endif;
  
@  if indV/=0; @
    print $ind;; meanc(fdv2)~stdc(fdv2)~meanc(fdvm)~stdc(fdvm);
@  endif; @
    
format /rd 8,5;

print "alpha=";; ((em^2).*(1-em))./vm-em;
print "beta=";; (em.*(1-em)^2)./vm-(1-em);
alp=((em^2).*(1-em))./vm-em;
bet=(em.*(1-em)^2)./vm-(1-em);
print "ave mean=";; alp/(alp+bet);
print "ave var=";; alp*bet/((alp+bet)^2*(alp+bet+1)); 

/* run ols */

{ vnam,m,bols,stb,vcols,stderr,sigma,cx,rsq,resid,dbw } = ols(0,y,x);

/* generate first differences for ols */
  olsl=xl'*bols;
  olsh=xh'*bols;
  
  olsh-olsl;

/* run ols with logit transformation*/

yt=ln(y./(1-y));

{ vnam,m,bolsl,stbl,vcols,stderrl,sigma,cx,rsq,resid,dbw } = ols(0,yt,x);

/* generate first differences for ols */
  olsl=(exp(xl'*bolsl))./(1+exp(xl'*bolsl));
  olsh=(exp(xh'*bolsl))./(1+exp(xh'*bolsl)); 
  
  olsh-olsl;

  ely=(exp(x*bolsl))./(1+exp(x*bolsl));
  
let pe=perinst;
if rows(y)==50 and stof(dep)/=stof(pe);
/* run ols for logged model */
  
let depl=lpemp;
let indl=ltask lwealth rights lelite lcol;
varsl=depl|indl;
dataset2=listw(dta,varsl,sel);
ly=(dataset2[.,1]);
lx=ones(rows(ly),1)~(dataset2[.,2:cols(dataset2)]);

{ lvnam,lm,lbols,lstb,lvcols,lstderr,lsigma,lcx,lrsq,lresid,ldbw } = ols(0,ly,lx);


/* set descriptives for x and z */
meanlx=meanc(lx);  @ these are k1X1 vectors @
stdlx=stdc(lx);	
minlx=minc(lx);	
maxlx=maxc(lx);	

/* create switching parameter */

if ind/=0;
lsw=zeros(1,cols(lx)-1)|eye(cols(lx)-1);
lxl=meanlx-stdlx.*lsw;           @ these are kXk-1 vectors @
lxh=meanlx+stdlx.*lsw;       
lxmin=meanlx.*(1-lsw)+minlx.*lsw;  
lxmax=meanlx.*(1-lsw)+maxlx.*lsw;
endif;

/* generate first differences for logged ols */

  lolsl=(exp(lxl'*lbols)-1)*100/10101;
  lolsh=(exp(lxh'*lbols)-1)*100/10101;
  
  lolsh-lolsl; 

/* get predicted data */
predly=lx*lbols;
predyl=(exp(predly)-1)*100/10101;
yadj=(exp(ly)-1)*100/10101;
minc(predyl)~maxc(predyl);
minc(yadj)~maxc(yadj);
msel=sumc((yadj-predyl)^2)/rows(y); 
endif;

/* compare mse of y */
mseb=sumc((y-eyb)^2)/rows(y);
mseo=sumc((y-x*bols)^2)/rows(y);
mset=sumc((y-ely)^2)/rows(y);

print "extreme vals of y,Ey_beta,Ey_OLS,Ey_lnOLS";
minc(y)~maxc(y);
minc(eyb)~maxc(eyb);
minc(x*bols)~maxc(x*bols);
minc(ely)~maxc(ely);
if rows(y)==50 and stof(dep)/=stof(pe);
minc(predyl)~maxc(predyl);
endif;
print " percentage of OLS and lnOLS falling outside of [0,1]";
sumc(x*bols.>1)/rows(y);
sumc(x*bols.<0)/rows(y);
sumc(ely.>1)/rows(y);
sumc(ely.<0)/rows(y);
if rows(y)==50 and stof(dep)/=stof(pe);
sumc(predyl.>1)/rows(y);
sumc(predyl.<0)/rows(y); 
endif;

rbro=mseb/mseo;
print "Beta is better if rb/ro<1";;rbro;
print "beta mse=";;mseb;
print "ols mse=";;mseo;
rbrt=mseb/mset;
print "Beta is better than logit trans if rb/ro<1";;rbrt;
if rows(y)==50 and stof(dep)/=stof(pe);
rbrl=mseb/msel; 
print "Beta is better if rb/rl<1";;rbrl; 
endif;

y~eyb~(x*bols)~ely;
output off;
